First, Importing Important Libraries¶

In [2]:
import pandas as pd
import numpy as np
import seaborn as sns
from plotly.subplots import make_subplots
import plotly.express as px
import colorama
from colorama import Fore
import matplotlib.pyplot as plt
from termcolor import colored

The daframe used here has been preprocessed and cleaned earlier making it simpler to read and visualize the data¶

...... Use right arrow-key for next slide...-->

In [3]:
df = pd.read_csv('movie_data_preprocessed.csv')

The search Algorithms to enable searches based on:¶

Actor Name¶

  • Also possible to search by combination of different Actors within a movie

Search by Director Name¶

Search by Genres¶

  • Most of the search only allow you to seach by one genre,
    this algorithm permits you to search multiple genres within a movie
In [4]:
#search by genre
l = []
for i in df.Genre:
    for j in i.split(","):
        l.append(j.strip())
Available_Genres = list(set(l))

def genre_seach(s,df):
    if len(s)==3:
        a = s[0]
        b = s[1]
        c = s[2]
        data = df.iloc[:,:9].loc[(df[a]==True) & (df[b]==True) & (df[c]==True)].sort_values('Rating', ascending = False)
    elif len(s)==2:
        a = s[0]
        b = s[1]
        data = df.iloc[:,:9].loc[(df[a]==True) & (df[b]==True)].sort_values('Rating', ascending = False)
    elif len(s)==1:
        a = s[0]
        data = df.iloc[:,:9].loc[df[a]==True].sort_values('Rating', ascending = False)
    return data.set_index('Title')
In [5]:
#Actor Search search upto 4 actor simultaniously
def actor_search(df,s,n):
    if len(s)<=4:
        for i in range(0,(4-len(s))):
            s.append('Vola')
            
    a = df.loc[(df['actor_1'] == s[0]) | (df['actor_1'] == s[1]) | (df['actor_1'] == s[2]) | (df['actor_1'] == s[3])].index
    b = df.loc[(df['actor_2'] == s[0]) | (df['actor_2'] == s[1]) | (df['actor_2'] == s[2]) | (df['actor_2'] == s[3])].index
    c = df.loc[(df['actor_3'] == s[0]) | (df['actor_3'] == s[1]) | (df['actor_3'] == s[2]) | (df['actor_3'] == s[3])].index
    d = df.loc[(df['actor_4'] == s[0]) | (df['actor_4'] == s[1]) | (df['actor_4'] == s[2]) | (df['actor_4'] == s[3])].index
    l = a.append(b).append(c).append(d)
    a = list(set(l))
    if n==1:
        print('Total Number of Movies : ',len(a))
        return df.iloc[:,:9].iloc[a].sort_values('Rating', ascending = False)
    else:
        return df.iloc[:,:9].iloc[a].sort_values('Rating', ascending = False)
In [6]:
#search by combining actors(all in one movie)
def actor_combination(df,b):
    ok = df.copy()
    df.Stars = df.Stars.apply(lambda x: x.title())
    c = []
    star = df.Stars.tolist()
    for i in df.Stars:
        if len(i.split(','))==4:
            a = []
            a.append(i.split(',')[0].strip())
            a.append(i.split(',')[1].strip())
            a.append(i.split(',')[2].strip())
            a.append(i.split(',')[3].strip())
            if len(set(b)-set(a)) == 0:
                c.append(star.index(i))
            else:
                del a
        else:
            a = []
            a.append(i.split(',')[0].strip())
            a.append(i.split(',')[1].strip())
            a.append(i.split(',')[2].strip())
            if len(set(b)-set(a)) == 0:
                c.append(star.index(i))
            else:
                del a
    print('Total Number of Movies : ',len(c))
    return ok.iloc[:,:9].iloc[c].sort_values('Rating', ascending = False).set_index('Title')
In [7]:
#search movie by director
def director_search(df,l):
    b = 0
    a = []
    for i in df.Directors:
        if len(set(l)-set([x.strip() for x in i.split(',')])) == 0:
            a.append(b)
        b = b+1
    print('Total Number of Movies : ',len(a))
    return df.iloc[:,:9].iloc[a].sort_values('Rating', ascending = False).set_index('Title')


Visualizing the data¶



Below Shows the list of Most Busy Directors of the decade¶

In [8]:
a = []
b = []
c = []
for i in df.Main_Directors.unique():
    a.append(len(df.loc[df['Main_Directors']==i]))
    c.append(round(np.mean(df['Rating'].loc[df['Main_Directors']==i]),2))
    b.append(i)
ind = pd.Index(list(range(1,len(a)+1)))
data = pd.DataFrame(zip(b,a,c),columns = ['Director','Movie_count','Avg_Rating']).sort_values('Movie_count', ascending = False).set_index(ind)
print('\n')
str1 = '~'*15+' The Most busy Director '+'~'*15
print(Fore.CYAN + colored(str1, attrs=['bold']))
display(data.head(10))

~~~~~~~~~~~~~~~ The Most busy Director ~~~~~~~~~~~~~~~
Director Movie_count Avg_Rating
1 Ridley Scott 13 6.89
2 Steven Spielberg 12 7.20
3 Antoine Fuqua 11 6.57
4 Shawn Levy 10 6.34
5 Michael Bay 10 6.31
6 Clint Eastwood 10 7.63
7 Zack Snyder 9 6.87
8 Guy Ritchie 9 7.13
9 Tim Burton 9 6.94
10 Ron Howard 8 6.99

Profit & Loss¶


Profit made by the movie would be the difference between amount put into production and amount made by sales and theater.

The list underneath shows the top 10 gained movies and tail(10) movies

In [9]:
#Movies with HIghest LOss and Gains
dfn = df[['Title','Rating','Main_Directors','new_budget','new_Income']].loc[df['new_Income']>0]
dfn['new_budget'] = round(dfn['new_budget']/1000000,2)
dfn['new_Income'] = round(dfn['new_Income']/1000000,2)
dfn['Amount_Gained'] = dfn['new_Income']-dfn['new_budget']
#dfn.head(10)
a = dfn.sort_values('Amount_Gained', ascending = False).reset_index().drop('index',axis=1)
b = dfn.sort_values('Amount_Gained').reset_index().drop('index',axis=1)
a.index += 1 
b.index += 1 
print('\n')
str1 = '~'*15+' Movies with biggest gains '+'~'*15
print(Fore.GREEN + colored(str1, attrs=['bold']))
display(a.head(10))
print('\n')
str1 = '~'*15+' Movies with biggest losses '+'~'*15
print(Fore.RED + colored(str1, attrs=['bold']))
display(b.head(10))

~~~~~~~~~~~~~~~ Movies with biggest gains ~~~~~~~~~~~~~~~
Title Rating Main_Directors new_budget new_Income Amount_Gained
1 Avatar 7.8 James Cameron 237.0 2922.92 2685.92
2 Avengers: Endgame 8.4 Anthony Russo 356.0 2797.50 2441.50
3 Avatar: The Way of Water 7.8 James Cameron 350.0 2267.95 1917.95
4 Star Wars: Episode VII - The Force Awakens 7.8 J J Abrams 245.0 2069.52 1824.52
5 Avengers: Infinity War 8.4 Anthony Russo 321.0 2048.36 1727.36
6 Spider-Man: No Way Home 8.3 Jon Watts 200.0 1917.43 1717.43
7 Jurassic World 6.9 Colin Trevorrow 150.0 1671.54 1521.54
8 The Lion King 6.8 Jon Favreau 260.0 1663.25 1403.25
9 Furious 7 7.1 James Wan 190.0 1515.34 1325.34
10 Top Gun: Maverick 8.4 Joseph Kosinski 170.0 1488.73 1318.73

~~~~~~~~~~~~~~~ Movies with biggest losses ~~~~~~~~~~~~~~~
Title Rating Main_Directors new_budget new_Income Amount_Gained
1 The Gray Man 6.5 Anthony Russo 200.0 0.45 -199.55
2 The Tomorrow War 6.5 Chris McKay 200.0 14.40 -185.60
3 Red Notice 6.3 Rawson Marshall Thurber 160.0 0.18 -159.82
4 The Irishman 7.8 Martin Scorsese 159.0 0.97 -158.03
5 Pinocchio 2 5.1 Robert Zemeckis 150.0 0.04 -149.96
6 Mulan 5.7 Niki Caro 200.0 69.97 -130.03
7 White Noise 6.6 Noah Baumbach 80.0 0.07 -79.93
8 Don't Look Up 7.2 Adam McKay 75.0 0.79 -74.21
9 How Do You Know 5.4 James L Brooks 120.0 48.67 -71.33
10 Devotion 7.0 J D Dillard 90.0 19.95 -70.05

Genre Distribution¶

  • Several Movies are released every year in verious Genres, the graph below depicts the amount of movies made in different genres
In [10]:
dfn = df.iloc[:,15:34].astype(int)
a = []
b = []
for i in dfn.columns:
    a.append(dfn[i].sum())
    b.append(i)
dfn = pd.DataFrame(zip(b,a),columns = ['Genre','Count']).sort_values('Count',ascending = False).reset_index().drop('index',axis=1)
import matplotlib.pyplot as plt
import seaborn as sns
fig, ax = plt.subplots(figsize=(10, 5))
sns.color_palette("tab10")
my_plot = sns.barplot(y = dfn['Count'], x = dfn['Genre'])
my_plot.set_xticklabels(my_plot.get_xticklabels(), rotation=45)
ax.bar_label(ax.containers[0])
my_plot.set(xlabel=None)
my_plot.set_title('Movies in current decade per Genre')
Out[10]:
Text(0.5, 1.0, 'Movies in current decade per Genre')

Long and short Movies¶

  • Sometimes you want to be entertained with short movies because time is your constraint then,
  • ater times you want to kill time with long hours of a movie

The graph with a list underneath makes it easier to choose one on the Loong movies or the short once.

In [11]:
ok = df.iloc[:,:9].query("Runtime > 0").sort_values('Runtime', ascending = False)

fig1 = px.bar(ok.head(5), y='Runtime', x='Title',
            title="Movies by Runtime",
            color = 'Genre', text = 'Title')
fig2 = px.bar(ok.tail(5), y='Runtime', x='Title',
            color = 'Genre', text = 'Title',)
fig1.update_xaxes(visible=False)
fig2.update_xaxes(visible=False)


fig1.show()
fig2.show()
print('\n')
str1 = '~'*45+' Top Loooonng Movies '+'~'*45
print(Fore.RED + colored(str1, attrs=['bold']))
display(df.iloc[:,:9].sort_values('Runtime', ascending = False).head(5).sort_values('Rating',ascending = False))
print('\n')
str1 = '~'*45+' The Shortest Movies '+'~'*45
print(Fore.GREEN + colored(str1, attrs=['bold']))
display(df.iloc[:,:9].query("Runtime > 0").sort_values('Runtime').head(5).sort_values('Rating',ascending = False))

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Top Loooonng Movies ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Title Rating Year Month Certificate Runtime Directors Stars Genre
1903 The Lord of the Rings: The Return of the King 9.0 2003 December PG-13 201 Peter Jackson Elijah Wood, Viggo Mortensen, Ian McKellen, Or... Action, Adventure, Drama
123 Zack Snyder's Justice League 8.0 2021 March R 242 Zack Snyder Henry Cavill, Ben Affleck, Gal Gadot, Amy Adams Action, Adventure, Fantasy
315 The Irishman 7.8 2019 November R 209 Martin Scorsese Robert De Niro, Al Pacino, Joe Pesci, Harvey K... Biography, Crime, Drama
0 Avatar: The Way of Water 7.8 2022 December PG-13 192 James Cameron Sam Worthington, Zoe Saldana, Sigourney Weaver... Action, Adventure, Fantasy
1564 Grindhouse 7.5 2007 April R 191 Robert Rodriguez, Eli Roth, Quentin Tarantino,... Kurt Russell, Rose McGowan, Danny Trejo, Zo Bell Action, Horror, Thriller

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The Shortest Movies ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Title Rating Year Month Certificate Runtime Directors Stars Genre
995 Banshee 8.5 2013 September Unrated 50 Gemma Mc Carthy Jonathan O Dwyer, Sean Flood, Frank Hurley, Fi... Action
477 The VelociPastor 5.0 2018 August Unrated 75 Brendan Steere Greg Cohan, George Schewnzer, Janice Young, Da... Action, Adventure, Comedy
1831 9 Songs 4.8 2004 March Unrated 71 Michael Winterbottom Kieran O Brien, Margo Stilley, Robert Levon Be... Drama, Music, Romance
1454 Superhero Movie 4.6 2008 March PG-13 75 Craig Mazin Drake Bell, Leslie Nielsen, Sara Paxton, Chris... Action, Comedy, Sci-Fi
578 Picture of Beauty 3.4 2017 April Unrated 70 Maxim Ford Taylor Sands, Danielle Rose, Pawel Hajnos, Mag... Drama, Romance

Different Country Contribution to the movies within the Dataframe¶

In [12]:
import squarify
a = df['Filming_location'].loc[df['Filming_location'] != 'Unknown'].value_counts().head(10)
plt.figure(figsize=(12,5))
squarify.plot(sizes=a.values.tolist(), label=a.index.tolist(), value=a.values.tolist(),
              color=[plt.cm.Set2(i) for i in range(7)],
              text_kwargs={'fontsize': 13.8})
plt.title('TOP 10 Country Praduction', fontsize=20,fontweight='bold')
plt.axis('off')
plt.show()

Mounthly release¶

  • Ever wondered which month has the highest release rates and how the other months would be compared to amount of releses,

    this graph would help you in better understanding this

In [13]:
a = df.Month.value_counts().index
b = df.Month.value_counts().values
data = pd.DataFrame(zip(a,b),columns = ['Month','count'])
data = data.iloc[[8,11,6,10,9,3,4,1,5,2,0,1]]
fig, ax = plt.subplots(figsize=(12, 5))
my_plot = sns.barplot(y = data['Month'],x = data['count'])
my_plot.bar_label(ax.containers[0])
my_plot.set(xlabel=None, ylabel=None)
my_plot.set_title('Movies praduced montly around the decade')
Out[13]:
Text(0.5, 1.0, 'Movies praduced montly around the decade')
In [14]:
from tabulate import tabulate
def print_tabular(ok,c1,c2):
    q = [i[0] for i in ok.index]
    q1 = [i for i in ok]
    return print (tabulate(zip(q,q1), headers=[c1, c2]))

The Busy Actors of the deccade¶

In [15]:
l = []
for i in df.Stars:
    for a in i.split(','):
        l.append(a.strip())
dfn = pd.DataFrame(l)
print('\n')
str1 = '~'*10+' Top 10 Busy Stars of the Decade '+'~'*10
print(Fore.RED + colored(str1, attrs=['bold']))

ok = dfn.value_counts().head(10)
print_tabular(ok,"Actor","Movie_Count")

~~~~~~~~~~ Top 10 Busy Stars of the Decade ~~~~~~~~~~
Actor                 Movie_Count
------------------  -------------
Dwayne Johnson                 25
Mark Wahlberg                  24
Ryan Reynolds                  23
Brad Pitt                      22
Amy Adams                      22
Matt Damon                     22
Scarlett Johansson             22
Liam Neeson                    21
Tom Hanks                      21
Cate Blanchett                 21

Movies with multiple origins are below¶

In [16]:
dfn = df[['Title','Rating','new_budget','Filming_location','origin_count']].sort_values('origin_count', ascending = False).head(10).reset_index().drop('index', axis =1)
dfn['Budget_Million'] = dfn['new_budget']/1000000
print('\n')
str1 = '~'*40+' Top Multi-Origin Movies '+'~'*40
print(Fore.BLUE + colored(str1, attrs=['bold']))
dfn.drop('new_budget',axis =1)

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Top Multi-Origin Movies ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Out[16]:
Title Rating Filming_location origin_count Budget_Million
0 Triangle of Sadness 7.6 Greece 10 10.70
1 Dogville 8.0 Sweden 9 10.00
2 Assassin's Creed 5.6 Spain 8 125.00
3 Valerian and the City of a Thousand Planets 6.4 France 7 177.20
4 The Hitman's Bodyguard 6.9 The Netherlands 7 30.00
5 Kingdom of Heaven 7.2 Morocco 7 130.00
6 Mr. Nobody 7.8 Canada 7 47.00
7 Brimstone 7.0 Hungary 7 12.84
8 Nymphomaniac: Vol. I 6.9 Germany 7 4.70
9 Valkyrie 7.1 Germany 7 75.00

TOP Animated movies of the decade¶

In [17]:
dfn = df.loc[df['Animation']==True].sort_values('new_Income', ascending = False).sort_values('Rating', ascending = False).reset_index().drop('index',axis=1)
print('\n')
str1 = '~'*50+' Top 12 Animation Movies '+'~'*50
print(Fore.YELLOW + colored(str1, attrs=['bold']))
dfn.iloc[:,:9].head(12)

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Top 12 Animation Movies ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Out[17]:
Title Rating Year Month Certificate Runtime Directors Stars Genre
0 WALL·E 8.4 2008 June G 98 Andrew Stanton Ben Burtt, Elissa Knight, Jeff Garlin, Fred Wi... Animation, Adventure, Family
1 Coco 8.4 2017 November PG 105 Lee Unkrich, Adrian Molina Anthony Gonzalez, Gael Garc a Bernal, Benjamin... Animation, Adventure, Comedy
2 Your Name. 8.4 2016 April PG 106 Makoto Shinkai Ry nosuke Kamiki, Mone Kamishiraishi, Ry Narit... Animation, Drama, Fantasy
3 Spider-Man: Into the Spider-Verse 8.4 2018 December PG 117 Bob Persichetti, Peter Ramsey, Rodney Rothman Shameik Moore, Jake Johnson, Hailee Steinfeld,... Animation, Action, Adventure
4 Toy Story 3 8.3 2010 June G 103 Lee Unkrich Tom Hanks, Tim Allen, Joan Cusack, Ned Beatty Animation, Adventure, Comedy
5 Up 8.3 2009 May PG 96 Pete Docter, Bob Peterson Edward Asner, Jordan Nagai, John Ratzenberger,... Animation, Adventure, Comedy
6 Howl's Moving Castle 8.2 2004 June PG 119 Hayao Miyazaki Chieko Baish , Takuya Kimura, Tatsuya Gash in,... Animation, Adventure, Family
7 Demon Slayer the Movie: Mugen Train 8.2 2020 April R 117 Haruo Sotozaki Natsuki Hanae, Akari Kit , Yoshitsugu Matsuoka... Animation, Action, Adventure
8 Finding Nemo 8.2 2003 May G 100 Andrew Stanton, Lee Unkrich Albert Brooks, Ellen DeGeneres, Alexander Goul... Animation, Adventure, Comedy
9 Inside Out 8.2 2015 June PG 95 Pete Docter, Ronnie Del Carmen Amy Poehler, Bill Hader, Lewis Black, Mindy Ka... Animation, Adventure, Comedy
10 How to Train Your Dragon 8.1 2010 March PG 98 Dean DeBlois, Chris Sanders Jay Baruchel, Gerard Butler, Christopher Mintz... Animation, Action, Adventure
11 A Silent Voice: The Movie 8.1 2016 September Unrated 130 Naoko Yamada Miyu Irino, Saori Hayami, Aoi Y ki, Kensh Ono Animation, Drama

BEST ACTORs by Data Science:-¶

In [18]:
a,r,g,bug = ([] for i in range(4))
for n,i in enumerate(df.Stars):
  for j in i.split(','):
    a.append(j.strip())
    r.append(df.Rating.iloc[n])
    g.append(df.Genre.iloc[n])
    bug.append(df.new_budget.iloc[n])
ns = pd.DataFrame(zip(a,r,g,bug), columns = ['Star','Rating','Genre','Budget'])
#ns.shape
a,b,c,d = ([] for i in range(4))
for i in ns.Star.unique():
    a.append(ns['Genre'].loc[ns['Star']==i].tolist())
    b.append(i)
    c.append(round(ns['Rating'].loc[ns['Star']==i].mean(),2))
    d.append(round(ns['Budget'].loc[ns['Star']==i].mean()/1000000,2))
ns1 = pd.DataFrame(zip(b,c,a,d), columns = ['Star','Rating','Genre','Budget'])    
#ns1.sample(3)
b,a,v,q = ([] for i in range(4))
for i in ns1['Genre']:
    a = []
    for x in i:
        for j in x.split(','):            
            a.append(j.strip())
    b.append(list(set(a)))
    v.append(len(set(a)))
    
for s in ns1.Star:
    q.append(len(actor_search(df,[s],0)))
    
ns1['All_Genre'] = b
ns1['Genre_Count'] = v
ns1['Movie_Count'] = q
del ns
ns1.drop('Genre',axis =1, inplace = True)
display(ns1.shape)
#ns1.sample(5)
(3502, 6)
In [33]:
from IPython.display import Image
Image(filename='actor_award.gif')
Out[33]:
<IPython.core.display.Image object>
In [34]:
from IPython.display import Image
Image(filename='R.png')
Out[34]:

AwardUrl

In [19]:
from pandas import option_context
with option_context('display.max_colwidth', None):
    print("Budget in Million $")
    print('\n')
    str2 = '~'*45+' Actors Trusted with High Budget '+'~'*45
    print(Fore.MAGENTA + colored(str2, attrs=['bold']))
    display(ns1.sort_values(by = 'Budget', ascending = False).head(5).set_index("Star").T)
    print('\n')
    str1 ='~'*45+' Most Varsatile Actor '+'~'*45
    print(Fore.CYAN + colored(str1, attrs=['bold']))
    display(ns1.sort_values(by = 'Genre_Count', ascending = False).head(1).set_index("Star").T)
    print('\n')
    str3 = '~'*45+' Actor with maax Movies '+'~'*45
    print(Fore.RED + colored(str3, attrs=['bold'])) 
    display(ns1.sort_values(by = 'Movie_Count', ascending = False).head(1).set_index("Star").T)
    print('\n')
    str4 = '~'*45+' Most consistent Performance '+'~'*45
    print(Fore.GREEN + colored(str4, attrs=['bold']))
    display(ns1.loc[ns1['Movie_Count']>10].sort_values(by = 'Rating', ascending = False).head(1).set_index("Star").T)
Budget in Million $


~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Actors Trusted with High Budget ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Star Mark Hamill Carrie Fisher Daisy Ridley Brandon Routh Donald Glover
Rating 6.9 6.9 7.07 6.1 6.85
Budget 317.0 317.0 279.0 270.0 267.5
All_Genre [Adventure, Action, Fantasy] [Adventure, Action, Fantasy] [Adventure, Sci-Fi, Action, Fantasy] [Adventure, Action, Sci-Fi] [Sci-Fi, Adventure, Action, Drama, Animation]
Genre_Count 3 3 4 3 5
Movie_Count 1 1 3 1 2

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Most Varsatile Actor ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Star Samuel L Jackson
Rating 6.87
Budget 78.24
All_Genre [Crime, Thriller, Sci-Fi, Biography, Adventure, Sport, Family, Horror, Action, Mystery, Fantasy, Music, Comedy, Drama, Animation]
Genre_Count 15
Movie_Count 21

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Actor with maax Movies ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Star Dwayne Johnson
Rating 6.46
Budget 113.64
All_Genre [Thriller, Crime, Sci-Fi, Biography, Adventure, Horror, Action, Fantasy, Comedy, Drama, Animation]
Genre_Count 11
Movie_Count 25

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Most consistent Performance ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Star Leonardo DiCaprio
Rating 7.84
Budget 96.15
All_Genre [Crime, Thriller, Romance, Sci-Fi, Biography, Adventure, Western, Action, Mystery, Comedy, Drama]
Genre_Count 11
Movie_Count 13
In [20]:
from pandas import option_context
with option_context('display.max_colwidth', None):
    print("Budget in Million $")
    print('\n')
    str2 = '~'*15+' Actors Trusted with High Budget '+'~'*15
    print(Fore.MAGENTA + colored(str2, attrs=['bold']))
    display(ns1.sort_values(by = 'Budget', ascending = False).head(5).set_index("Star").T)
Budget in Million $


~~~~~~~~~~~~~~~ Actors Trusted with High Budget ~~~~~~~~~~~~~~~
Star Mark Hamill Carrie Fisher Daisy Ridley Brandon Routh Donald Glover
Rating 6.9 6.9 7.07 6.1 6.85
Budget 317.0 317.0 279.0 270.0 267.5
All_Genre [Adventure, Action, Fantasy] [Adventure, Action, Fantasy] [Adventure, Sci-Fi, Action, Fantasy] [Adventure, Action, Sci-Fi] [Sci-Fi, Adventure, Action, Drama, Animation]
Genre_Count 3 3 4 3 5
Movie_Count 1 1 3 1 2
In [21]:
print('\n')
str4 = '~'*15+' Most consistent Performance '+'~'*15
print(Fore.GREEN + colored(str4, attrs=['bold']))
display(ns1.loc[ns1['Movie_Count']>10].sort_values(by = 'Rating', ascending = False).head(1).set_index("Star").T)

~~~~~~~~~~~~~~~ Most consistent Performance ~~~~~~~~~~~~~~~
Star Leonardo DiCaprio
Rating 7.84
Budget 96.15
All_Genre [Crime, Thriller, Romance, Sci-Fi, Biography, ...
Genre_Count 11
Movie_Count 13

How The Income can be compared to the budget of a movie¶

In [22]:
from IPython.display import Image
Image(filename='gains.png')
Out[22]:
In [25]:
x = df['Rating']
y = df['new_Income']/1000000
z = df['new_budget']/1000000


c = [i.split(',')[0] for i in df['Genre']]
fig = px.scatter(x=z, y=y, title='Gains over Budget comparision of all the movies ', color=c,
                 width=750, height=600)
fig.update_layout(
    xaxis_title="Budget",
    yaxis_title="Income",
    legend_title="Genres",)

import seaborn as sns
plt.figure(figsize=(8,6))
palette = sns.color_palette("tab10", as_cmap=True)
#display(sns.lineplot(y=y,x=x))
#display(sns.lineplot(y=z,x=x))
sns.lineplot(x=z,y=y,palette=palette)
fig.show()
C:\Users\rishi\AppData\Local\Temp\ipykernel_15572\682156227.py:19: UserWarning:

Ignoring `palette` because no `hue` variable has been assigned.

In [27]:
sns.lineplot(x=z,y=y)
plt.figure(figsize=(8,6))
Out[27]:
<Figure size 800x600 with 0 Axes>
<Figure size 800x600 with 0 Axes>
In [28]:
#Making Search Algorithms ever easier
def search_(s):
    l = []
    for j in s.split(','):
        i = j.strip()
        l.append(i.title())
    return l
def search_in_genre(s):
    l = search_(s)
    return genre_seach(l,df)
def search_by_actor(s):
    b = search_(s)
    return actor_combination(df,b)
def search_by_director(s):
    return actor_combination(df,search_(s))

Embeded Search function¶

Returns Table Sorted By Top Rating First

Search Multiple Genre¶

  • search_in_genre(string) --> pass Generes as strings seperated by comma #### Search By Different Actor Combination
  • search_by_actor(string) --> pass Actor names as strings seperated by comma #### Search By Director
  • search_by_director(string) --> pass Director name as string


List of all available Genrese¶

In [29]:
# List of all available Genres
l = []
for i in df.Genre:
    for j in i.split(","):
        l.append(j.strip())
Available_Genres = list(set(l))
ind = list(range(1,len(Available_Genres)+1))
print(tabulate(zip(ind,Available_Genres), headers=['S.No.','List of Available Genres'], tablefmt="grid"))
+---------+----------------------------+
|   S.No. | List of Available Genres   |
+=========+============================+
|       1 | Thriller                   |
+---------+----------------------------+
|       2 | Romance                    |
+---------+----------------------------+
|       3 | Biography                  |
+---------+----------------------------+
|       4 | Family                     |
+---------+----------------------------+
|       5 | Animation                  |
+---------+----------------------------+
|       6 | History                    |
+---------+----------------------------+
|       7 | Musical                    |
+---------+----------------------------+
|       8 | Drama                      |
+---------+----------------------------+
|       9 | Crime                      |
+---------+----------------------------+
|      10 | War                        |
+---------+----------------------------+
|      11 | Sci-Fi                     |
+---------+----------------------------+
|      12 | Adventure                  |
+---------+----------------------------+
|      13 | Action                     |
+---------+----------------------------+
|      14 | Music                      |
+---------+----------------------------+
|      15 | Fantasy                    |
+---------+----------------------------+
|      16 | Sport                      |
+---------+----------------------------+
|      17 | Horror                     |
+---------+----------------------------+
|      18 | Western                    |
+---------+----------------------------+
|      19 | Mystery                    |
+---------+----------------------------+
|      20 | Comedy                     |
+---------+----------------------------+

Search Exmaples¶

Click to view in the notebook button¶

In [30]:
search_in_genre('drama')
search_by_actor('Leonardo Dicaprio')
Total Number of Movies :  13
Out[30]:
Rating Year Month Certificate Runtime Directors Stars Genre
Title
Inception 8.8 2010 July PG-13 148 Christopher Nolan Leonardo DiCaprio, Joseph Gordon Levitt, Ellio... Action, Adventure, Sci-Fi
The Departed 8.5 2006 October R 151 Martin Scorsese Leonardo DiCaprio, Matt Damon, Jack Nicholson,... Crime, Drama, Thriller
Django Unchained 8.4 2012 December R 165 Quentin Tarantino Jamie Foxx, Christoph Waltz, Leonardo DiCaprio... Drama, Western
The Wolf of Wall Street 8.2 2013 December R 180 Martin Scorsese Leonardo DiCaprio, Jonah Hill, Margot Robbie, ... Biography, Comedy, Crime
Shutter Island 8.2 2010 February R 138 Martin Scorsese Leonardo DiCaprio, Emily Mortimer, Mark Ruffal... Mystery, Thriller
The Revenant 8.0 2015 January R 156 Alejandro G I rritu Leonardo DiCaprio, Tom Hardy, Will Poulter, Do... Action, Adventure, Drama
Blood Diamond 8.0 2006 December R 143 Edward Zwick Leonardo DiCaprio, Djimon Hounsou, Jennifer Co... Adventure, Drama, Thriller
Once Upon a Time in Hollywood 7.6 2019 July R 161 Quentin Tarantino Leonardo DiCaprio, Brad Pitt, Margot Robbie, E... Comedy, Drama
The Aviator 7.5 2004 December PG-13 170 Martin Scorsese Leonardo DiCaprio, Cate Blanchett, Kate Beckin... Biography, Drama
Revolutionary Road 7.3 2008 January R 119 Sam Mendes Leonardo DiCaprio, Kate Winslet, Christopher F... Drama, Romance
Don't Look Up 7.2 2021 December R 138 Adam McKay Leonardo DiCaprio, Jennifer Lawrence, Meryl St... Comedy, Drama, Sci-Fi
The Great Gatsby 7.2 2013 May PG-13 143 Baz Luhrmann Leonardo DiCaprio, Carey Mulligan, Joel Edgert... Drama, Romance
Body of Lies 7.0 2008 October R 128 Ridley Scott Leonardo DiCaprio, Russell Crowe, Mark Strong,... Action, Drama, Thriller
In [31]:
search_by_director('Sean Penn')
Total Number of Movies :  5
Out[31]:
Rating Year Month Certificate Runtime Directors Stars Genre
Title
Mystic River 7.9 2003 October R 138 Clint Eastwood Sean Penn, Tim Robbins, Kevin Bacon, Emmy Rossum Crime, Drama, Mystery
21 Grams 7.6 2003 January R 124 Alejandro G I rritu Sean Penn, Benicio Del Toro, Naomi Watts, Dann... Crime, Drama, Thriller
Licorice Pizza 7.2 2021 December R 133 Paul Thomas Anderson Alana Haim, Cooper Hoffman, Sean Penn, Tom Waits Comedy, Drama, Romance
The Tree of Life 6.8 2011 May PG-13 139 Terrence Malick Brad Pitt, Sean Penn, Jessica Chastain, Hunter... Drama, Fantasy
Gangster Squad 6.7 2013 January R 113 Ruben Fleischer Sean Penn, Ryan Gosling, Emma Stone, Giovanni ... Action, Crime, Drama
In [32]:
search_in_genre('adventure, biography')
Out[32]:
Rating Year Month Certificate Runtime Directors Stars Genre
Title
Into the Wild 8.1 2007 October R 148 Sean Penn Emile Hirsch, Vince Vaughn, Catherine Keener, ... Adventure, Biography, Drama
Papillon 7.2 2017 August R 133 Michael Noer Charlie Hunnam, Damijan Oklopdzic, Christopher... Adventure, Biography, Crime
Everest 7.1 2015 September PG-13 121 Baltasar Korm kur Jason Clarke, Ang Phula Sherpa, Thomas M Wrigh... Action, Adventure, Biography
Wild 7.1 2014 December R 115 Jean Marc Vall e Reese Witherspoon, Laura Dern, Gaby Hoffmann, ... Adventure, Biography, Drama
In the Heart of the Sea 6.9 2015 December PG-13 122 Ron Howard Chris Hemsworth, Cillian Murphy, Brendan Glees... Action, Adventure, Biography
The Lost City of Z 6.6 2016 April PG-13 141 James Gray Charlie Hunnam, Robert Pattinson, Sienna Mille... Adventure, Biography, Drama
In [33]:
search_by_actor('Leonardo Dicaprio, Christoph Waltz')
Total Number of Movies :  1
Out[33]:
Rating Year Month Certificate Runtime Directors Stars Genre
Title
Django Unchained 8.4 2012 December R 165 Quentin Tarantino Jamie Foxx, Christoph Waltz, Leonardo Dicaprio... Drama, Western